home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.4)
-
- '''FileCorpus.py - Corpus composed of file system artifacts.'''
- import sys
- import gzip
- import email
- from Crypto.Cipher import AES
- from spambayes.Corpus import ExpiryCorpus
- from spambayes.storage import NO_TRAINING_FLAG
- from spambayes.FileCorpus import FileCorpus, FileMessage
- from spambayes.FileCorpus import MessageFactory, GzipFileMessage
- from spamexperts.Options import options
- from spamexperts.OptionsClass import DELAYED
- from spamexperts.message import SEHeaderMessage
-
- class SEFileMessage(FileMessage):
- message_class = SEHeaderMessage
-
- def load(self):
- '''Read the Message substance from the file.'''
- if self.loaded:
- return None
-
- if not self.file_name is not None:
- raise AssertionError, 'Must set filename before using FileMessage instances.'
- pn = self.pathname()
- if options[('globals', 'verbose')]:
- print 'Loading', self.file_name
-
- key = '%.16i' % hash(pn.lower())[:16]
- crypt = AES.new(key)
- fp = gzip.open(pn, 'rb')
-
- try:
- self._msg = email.message_from_string(fp.read(), _class = self.message_class)
- except IOError:
- e = None
- if str(e) == 'Not a gzipped file' or str(e) == 'Unknown compression method':
- fp.close()
- fp = open(self.pathname(), 'rb')
- content = fp.read()
- if len(content) % 16 == 0:
- content = crypt.decrypt(content).rstrip('\x00')
-
- self._msg = email.message_from_string(content, _class = self.message_class)
- fp.close()
- else:
- raise
- except:
- str(e) == 'Unknown compression method'
-
- fp.close()
- self.loaded = True
-
-
- def store(self):
- '''Write the Message substance to the file'''
- if not self.file_name is not None:
- raise AssertionError, 'Must set filename before using FileMessage instances.'
- if options[('globals', 'verbose')]:
- print 'storing', self.file_name
-
- pn = self.pathname()
- key = '%.16i' % hash(pn.lower())[:16]
- crypt = AES.new(key)
- as_string = self.as_string()
- as_string = as_string + '\x00' * (16 - len(as_string) % 16)
- if not len(as_string) % 16 == 0:
- raise AssertionError, 'Must be multiple of 16 (not %d).' % (len(as_string),)
- as_string = crypt.encrypt(as_string)
- fp = open(pn, 'wb')
- fp.write(as_string)
- fp.close()
-
-
-
- class SEFileMessageFactory(MessageFactory):
- klass = SEFileMessage
-
-
- class SEGzipFileMessage(GzipFileMessage):
- message_class = SEHeaderMessage
-
-
- class SEGzipFileMessageFactory(MessageFactory):
- klass = SEGzipFileMessage
-
-
- class CarefulFileCorpus(FileCorpus):
- """FileCorpus that is aware of SpamExperts 'delayed' messages and
- blocks removal of these by overwriting the removeMessage method."""
-
- def __init__(self, msg_db, factory, directory, filter, cacheSize):
- """Just store the db that contains the 'delayed' message list"""
- self.blocked_or_delayed_db = msg_db
- FileCorpus.__init__(self, factory, directory, filter, cacheSize)
- self.headers = { }
- self.headersInMemory = []
-
-
- def __len__(self):
- return len(self.msgs)
-
-
- def takeMessage(self, key, fromcorpus, fromCache = False):
- '''Move a Message from another corpus to this corpus.
-
- If we are moving from another cache, pass that information on to
- the removeMessage method.'''
- msg = fromcorpus[key]
- msg.setId(key)
- msg.load()
- msg.message_info_db.load_msg(msg)
- if msg.GetTrained() is None:
- observer_flags = NO_TRAINING_FLAG
- else:
- observer_flags = 0
- fromcorpus.removeMessage(msg, observer_flags = observer_flags, fromCache = fromCache)
- self.addMessage(msg)
-
-
- def removeMessage(self, msg, observer_flags = 0, fromCache = False):
- '''Careful removal.
-
- When not moving, any attempt to delete a delayed message will
- be prevented.'''
- if not fromCache:
- msg_id = msg.getId()
- (account, blockstate) = msg.getBlockingState()
- if options[('globals', 'verbose')]:
- print 'Removing message %s, %s, %s' % (msg_id, account, blockstate)
-
- if blockstate == DELAYED:
- if options[('globals', 'verbose')]:
- print >>sys.stderr, 'removeMessage: Attempt to remove delayed message intercepted. Message %s not removed' % (msg_id,)
-
- return None
-
-
- FileCorpus.removeMessage(self, msg, observer_flags = observer_flags)
-
- headerCacheSize = 1000
- headersToCache = ('From', 'Subject', 'Date', 'Delivery-Date', 'To', 'X-SpamExperts-Date')
-
- def cacheMessageHeaders(self, msg):
- key = msg.key()
- to_cache = { }
- for header in self.headersToCache:
- to_cache[header] = msg[header]
-
- self.headers[key] = to_cache
-
- try:
- self.headersInMemory.remove(key)
- except ValueError:
- pass
-
- self.headersInMemory.append(key)
- if len(self.headersInMemory) > self.headerCacheSize:
- key = self.headersInMemory.pop(0)
-
- try:
- del self.headers[key]
- except KeyError:
- print >>sys.stderr, "Can't expire headers from cache."
- if options[('globals', 'verbose')]:
- import traceback
- traceback.print_exc(None, sys.stderr)
- print >>sys.stderr, 'Key is:', key
- print >>sys.stderr, 'All keys:', self.headers.keys()
-
- except:
- options[('globals', 'verbose')]
-
-
- None<EXCEPTION MATCH>KeyError
-
-
- def cacheMessage(self, msg):
- FileCorpus.cacheMessage(self, msg)
-
- try:
- msg.load()
- except:
- import traceback
- print >>sys.stderr, 'Error occuring trying to load message to cache headers.'
- traceback.print_exc(None, sys.stderr)
- return None
-
- self.cacheMessageHeaders(msg)
-
-
- def get_headers(self, msg):
- '''Return cached headers for a message.'''
- key = msg.key()
-
- try:
- return self.headers[key]
- except KeyError:
- pass
-
- msg.load()
- self.cacheMessageHeaders(msg)
- return self.headers[key]
-
-
-
- class CarefulExpiryFileCorpus(ExpiryCorpus, CarefulFileCorpus):
- '''Careful FileCorpus of "young" file system artifacts.'''
-
- def __init__(self, expireBefore, msg_db, factory, directory, filter = '*', cacheSize = 250):
- ExpiryCorpus.__init__(self, expireBefore)
- CarefulFileCorpus.__init__(self, msg_db, factory, directory, filter, cacheSize)
-
-
-